# Does including the data that is derived from correcting percentages and NASS county statistics change the distribution of the data?

test <- S %>%
  mutate(LOSS_2020 = ifelse(is.na(Operations_2020Loss_perc) & is.na(NASS_County_Yield_2020), Operations_2020Loss, Operations_2020Loss_perc*NASS_County_Yield_2020)) %>%
  relocate(LOSS_2020, .after = Operations_2020Loss_perc) %>%
  mutate(LOSS_2021 = ifelse(is.na(Operations_2021Loss_perc) & is.na(NASS_County_Yield_2021), Operations_2021Loss, Operations_2021Loss_perc*NASS_County_Yield_2021)) %>%
  relocate(LOSS_2021, .after = Operations_2021Loss_perc)

hist(test$LOSS_2020)
hist(test$Operations_2020Loss)
hist(test$LOSS_2021)
hist(test$Operations_2021Loss)

# No, so continue with this data included. 
S <- S %>%
  # create final loss columns for each year
  mutate(LOSS_2020 = ifelse(is.na(Operations_2020Loss_perc) & is.na(NASS_County_Yield_2020), Operations_2020Loss, Operations_2020Loss_perc*NASS_County_Yield_2020)) %>%
  relocate(LOSS_2020, .after = Operations_2020Loss_perc) %>%
  mutate(LOSS_2021 = ifelse(is.na(Operations_2021Loss_perc) & is.na(NASS_County_Yield_2021), Operations_2021Loss, Operations_2021Loss_perc*NASS_County_Yield_2021)) %>%
  relocate(LOSS_2021, .after = Operations_2021Loss_perc) %>%
  # remove other columns used to create the final LOSS_2020 and LOSS_2021 columns
  select(!c(starts_with('NASS'), contains('Operations_20'), starts_with('Complete')))

HEATMAPS

# heat map of responses

# summarize survey data to count responses by county
S_count <- S %>% group_by(County, State) %>% summarise(Responses = n())
## `summarise()` has grouped output by 'County'. You can override using the
## `.groups` argument.
# combine geo info with survey data
combined_counties <- left_join(combined_counties_raw, S_count, by = c('NAME'='County', 'STUSPS'='State'))

# Plot the map
ggplot(data = combined_counties) +
  geom_sf(aes(fill = Responses), color = "black") +
  theme_minimal() +
  labs(title = "County-Level Count of Survey Responses") +
  scale_fill_viridis_c(option = "plasma") +
  theme(axis.text = element_blank(),
        axis.ticks = element_blank(),
        panel.grid = element_blank(),
        plot.title = element_text(hjust = 0.5))

rm(S_count, combined_counties)
# heat map of losses 2020

S_loss_2020 <- S %>% group_by(County, State) %>% summarise(Loss = ifelse(all(LOSS_2020 == 0), 0, mean(LOSS_2020, na.rm = TRUE)))
## `summarise()` has grouped output by 'County'. You can override using the
## `.groups` argument.
# Combine geo info with survey data
combined_counties <- left_join(combined_counties_raw, S_loss_2020, by = c('NAME'='County', 'STUSPS'='State'))

# Plot the map
ggplot(data = combined_counties) +
  geom_sf(aes(fill = Loss), color = "black") +
  theme_minimal() +
  scale_fill_viridis_c(option = "plasma", limits=c(0, 60)) +
  labs(title = "County-Level Reported Losses Due to Tar Spot in 2020") +
  theme(axis.text = element_blank(),
        axis.ticks = element_blank(),
        panel.grid = element_blank(),
        plot.title = element_text(hjust = 0.5))

rm(combined_counties)
# heat map of losses 2020

S_loss_2021 <- S %>% group_by(County, State) %>% summarise(Loss = ifelse(all(LOSS_2021 == 0), 0, 
                                                                    mean(LOSS_2021, na.rm = TRUE)))
## `summarise()` has grouped output by 'County'. You can override using the
## `.groups` argument.
# Combine geo info with survey data
combined_counties <- left_join(combined_counties_raw, S_loss_2021, by = c('NAME'='County', 'STUSPS'='State'))

# Plot the map
ggplot(data = combined_counties) +
  geom_sf(aes(fill = Loss), color = "black") +
  theme_minimal() +
  scale_fill_viridis_c(option = "plasma", limits=c(0,60)) +
  labs(title = "County-Level Reported Losses Due to Tar Spot in 2021") +
  theme(axis.text = element_blank(),
        axis.ticks = element_blank(),
        panel.grid = element_blank(),
        plot.title = element_text(hjust = 0.5))

rm(combined_counties)

LOSS HISTOGRAMS

S_loss_2020 <- S_loss_2020 %>%
  mutate(Year = '2020')
S_loss_2021 <- S_loss_2021 %>%
  mutate(Year = '2021')
S_loss <- rbind(S_loss_2020, S_loss_2021)

ggplot(S_loss, aes(x=Loss)) + 
  geom_histogram(binwidth=1) +
  facet_grid(~Year)
## Warning: Removed 77 rows containing non-finite outside the scale range
## (`stat_bin()`).

rm(S_loss_2020, S_loss_2021, S_loss)

WEATHER CORRELATIONS

# function to calculate correlation test and return p-values
cor_test_results <- function(y, x) {
  cor_test <- cor.test(x, y, use = "complete.obs", method = 'pearson')
  return(c(cor = cor_test$estimate, p_value = cor_test$p.value))
}

# Apply the function to each x variable
results_df <- t(sapply(DF[ , -1], function(x) cor_test_results(DF$Loss, x)))

# Convert the results into a dataframe
results_df <- as.data.frame(results_df)
colnames(results_df) <- c("Correlation", "p")
results_df$code <- 0
results_df <- results_df %>%
  mutate(code = if_else(p < 0.001, "***",
                        if_else(p > 0.001 & p < 0.01, "**",
                                if_else(p >  0.01 & p < 0.05, "*",""))))
results_df$Var <- rownames(results_df)

results_df %>% 
  ggplot(aes(x=reorder(Var, -Correlation, decreasing = TRUE), y = Correlation)) +
  geom_bar(stat = "identity") +
  labs(x="Variable", y = "Correlation strength (Pearson)", title = '') +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
  geom_text(aes(label = code, vjust = ifelse(Correlation >= 0, -0.25, 1.65))) +
  ylim(-0.7, 0.7)

rm(results_df)

ENVIRONMENTAL SCATTERPLOTS

# Define data for MA
response = names(DF_2020[c(2:17)])
quant <- 'Loss' 
response = set_names(response)

# Function to output scatter plots for each var

scatter_fun = function(x, y) {
   ggplot(DF_2020, aes(x = .data[[x]], y = .data[[y]])) +
    geom_point() +
    theme_bw()
  }

# Output plots 
explore.plots = map(response, ~scatter_fun(.x, quant))
explore.plots
## $ppt_June
## Warning: Removed 66 rows containing missing values or values outside the scale range
## (`geom_point()`).

## 
## $ppt_July
## Warning: Removed 66 rows containing missing values or values outside the scale range
## (`geom_point()`).

## 
## $ppt_Aug
## Warning: Removed 66 rows containing missing values or values outside the scale range
## (`geom_point()`).

## 
## $ppt_Sept
## Warning: Removed 66 rows containing missing values or values outside the scale range
## (`geom_point()`).

## 
## $tmin_June
## Warning: Removed 66 rows containing missing values or values outside the scale range
## (`geom_point()`).

## 
## $tmin_July
## Warning: Removed 66 rows containing missing values or values outside the scale range
## (`geom_point()`).

## 
## $tmin_Aug
## Warning: Removed 66 rows containing missing values or values outside the scale range
## (`geom_point()`).

## 
## $tmin_Sept
## Warning: Removed 66 rows containing missing values or values outside the scale range
## (`geom_point()`).

## 
## $tmean_June
## Warning: Removed 66 rows containing missing values or values outside the scale range
## (`geom_point()`).

## 
## $tmean_July
## Warning: Removed 66 rows containing missing values or values outside the scale range
## (`geom_point()`).

## 
## $tmean_Aug
## Warning: Removed 66 rows containing missing values or values outside the scale range
## (`geom_point()`).

## 
## $tmean_Sept
## Warning: Removed 66 rows containing missing values or values outside the scale range
## (`geom_point()`).

## 
## $tmax_June
## Warning: Removed 66 rows containing missing values or values outside the scale range
## (`geom_point()`).

## 
## $tmax_July
## Warning: Removed 66 rows containing missing values or values outside the scale range
## (`geom_point()`).

## 
## $tmax_Aug
## Warning: Removed 66 rows containing missing values or values outside the scale range
## (`geom_point()`).

## 
## $tmax_Sept
## Warning: Removed 66 rows containing missing values or values outside the scale range
## (`geom_point()`).

# Define data for MA

response = names(DF_2021[c(2:17)])
quant <- 'Loss' 
response = set_names(response)

# Function to output scatter plots for each var

scatter_fun = function(x, y) {
   ggplot(DF_2021, aes(x = .data[[x]], y = .data[[y]])) +
    geom_point() +
    theme_bw()
  }

# Output plots 
explore.plots = map(response, ~scatter_fun(.x, quant))
explore.plots
## $ppt_June
## Warning: Removed 35 rows containing missing values or values outside the scale range
## (`geom_point()`).

## 
## $ppt_July
## Warning: Removed 35 rows containing missing values or values outside the scale range
## (`geom_point()`).

## 
## $ppt_Aug
## Warning: Removed 35 rows containing missing values or values outside the scale range
## (`geom_point()`).

## 
## $ppt_Sept
## Warning: Removed 35 rows containing missing values or values outside the scale range
## (`geom_point()`).

## 
## $tmin_June
## Warning: Removed 35 rows containing missing values or values outside the scale range
## (`geom_point()`).

## 
## $tmin_July
## Warning: Removed 35 rows containing missing values or values outside the scale range
## (`geom_point()`).

## 
## $tmin_Aug
## Warning: Removed 35 rows containing missing values or values outside the scale range
## (`geom_point()`).

## 
## $tmin_Sept
## Warning: Removed 35 rows containing missing values or values outside the scale range
## (`geom_point()`).

## 
## $tmean_June
## Warning: Removed 35 rows containing missing values or values outside the scale range
## (`geom_point()`).

## 
## $tmean_July
## Warning: Removed 35 rows containing missing values or values outside the scale range
## (`geom_point()`).

## 
## $tmean_Aug
## Warning: Removed 35 rows containing missing values or values outside the scale range
## (`geom_point()`).

## 
## $tmean_Sept
## Warning: Removed 35 rows containing missing values or values outside the scale range
## (`geom_point()`).

## 
## $tmax_June
## Warning: Removed 35 rows containing missing values or values outside the scale range
## (`geom_point()`).

## 
## $tmax_July
## Warning: Removed 35 rows containing missing values or values outside the scale range
## (`geom_point()`).

## 
## $tmax_Aug
## Warning: Removed 35 rows containing missing values or values outside the scale range
## (`geom_point()`).

## 
## $tmax_Sept
## Warning: Removed 35 rows containing missing values or values outside the scale range
## (`geom_point()`).

# Define data for MA

response = names(DF[c(2:17)])
quant <- 'Loss' 
response = set_names(response)

# Function to output scatter plots for each var

scatter_fun = function(x, y) {
   ggplot(DF, aes(x = .data[[x]], y = .data[[y]])) +
    geom_point() +
    theme_bw()
  }

# Output plots 
explore.plots = map(response, ~scatter_fun(.x, quant))
explore.plots
## $ppt_June

## 
## $ppt_July

## 
## $ppt_Aug

## 
## $ppt_Sept

## 
## $tmin_June

## 
## $tmin_July

## 
## $tmin_Aug

## 
## $tmin_Sept

## 
## $tmean_June

## 
## $tmean_July

## 
## $tmean_Aug

## 
## $tmean_Sept

## 
## $tmax_June

## 
## $tmax_July

## 
## $tmax_Aug

## 
## $tmax_Sept

DF <- DF %>% select(!contains('Sept')) %>% filter(Loss > 0)

response = names(DF[c(2:13)]) 
quant <- 'Loss' 
response = set_names(response)

# Function to output scatter plots for each var

scatter_fun = function(x, y) {
   ggplot(DF, aes(x = .data[[x]], y = .data[[y]])) +
    geom_point() +
    stat_density(aes(x=.data[[x]], y=(18*(..scaled..))), position="identity", geom="line", color = 'red', linewidth = 1.5) +
    geom_smooth(method='loess', color = 'blue', se = FALSE) +
    geom_smooth(method='lm', color = 'darkgrey', se = FALSE) +
    stat_regline_equation()+
    stat_cor(aes(label = paste(..rr.label.., ..p.label.., sep = "*`,`~")), label.x.npc = "centre") + 
    theme_bw() +
    ylim(0, 60)
  }


# Output plots 
explore.plots = map(response, ~scatter_fun(.x, quant))
explore.plots
## $ppt_June
## Warning: The dot-dot notation (`..scaled..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(scaled)` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

## 
## $ppt_July
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

## 
## $ppt_Aug
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

## 
## $tmin_June
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

## 
## $tmin_July
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

## 
## $tmin_Aug
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

## 
## $tmean_June
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

## 
## $tmean_July
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 11 rows containing missing values or values outside the scale range
## (`geom_smooth()`).

## 
## $tmean_Aug
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 7 rows containing missing values or values outside the scale range
## (`geom_smooth()`).

## 
## $tmax_June
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

## 
## $tmax_July
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

## 
## $tmax_Aug
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 9 rows containing missing values or values outside the scale range
## (`geom_smooth()`).

response = names(DF[c(2:13)]) 
quant <- 'Loss' 
response = set_names(response)

# Function to output scatter plots for each var

scatter_fun = function(x, y) {
   ggplot(DF, aes(x = .data[[x]])) +
    geom_histogram(aes(y = ..density..), fill = 'gray', color = 'black', binwidth = 0.25) +
    geom_density(color = 'red', linewidth = 1.5)+
    #geom_smooth(method='loess', color = 'blue', se = FALSE) +
    #geom_smooth(method='lm', color = 'blue', se = FALSE) +
    #stat_regline_equation()+
    #stat_cor(aes(label = paste(..rr.label.., ..p.label.., sep = "*`,`~")),
           #label.x.npc = "centre") + 
    theme_bw() 
  }


# Output plots 
explore.plots = map(response, ~scatter_fun(.x, quant))
explore.plots
## $ppt_June

## 
## $ppt_July

## 
## $ppt_Aug

## 
## $tmin_June

## 
## $tmin_July

## 
## $tmin_Aug

## 
## $tmean_June

## 
## $tmean_July

## 
## $tmean_Aug

## 
## $tmax_June

## 
## $tmax_July

## 
## $tmax_Aug

G_ppt <- DF %>%
  filter(Loss > 0) %>%
  select(Loss, contains('ppt'))

response = names(G_ppt[c(2:4)]) 
quant <- 'Loss' 

scatter_fun = function(x, y) {
  
  # Fit the nonlinear model using nls
  model <- nls(Loss ~ k*exp(-1/2*(G_ppt[[x]]-mu)^2/sigma^2), 
               data = G_ppt, 
               start = c(mu = 100, sigma = 20, k = 10))
  
  # Extract coefficients
  coefficients <- coef(model)
  
  # Plot 
  ggplot(data = G_ppt, aes(x = .data[[x]], y = .data[[y]])) +
    geom_point() +
    geom_smooth(method = 'nls',
                color = 'blue',
                formula = y ~ k * exp(-1/2 * (x - mu)^2 / sigma^2),
                method.args = list(start = c(mu = 100, sigma = 20, k = 10)),
                se = FALSE) +
    theme_bw() +
    theme(panel.border = element_blank()) +
    annotate("text", 
             x = Inf, y = Inf, vjust = 2, hjust = 1,
             #x = 0.9, y = 0.9, hjust = 1, vjust = 1, halign = 0.5, valign = 0.5,
             label = paste("Amplitude =", round(coefficients[3], 2),
                           ", Standard deviation =", round(coefficients[2], 2),
                           ", Mean =", round(coefficients[1], 2)))
}

# Output plots 
explore.plots = map(response, ~scatter_fun(.x, quant))
explore.plots
## [[1]]

## 
## [[2]]

## 
## [[3]]

G_tmax <- DF %>%
  filter(Loss > 0) %>%
  select(Loss, contains('tmax'))

response = names(G_tmax[c(2:4)]) 
quant <- 'Loss' 

scatter_fun = function(x, y) {
  
  # Fit the nonlinear model using nls
  model <- nls(Loss ~ k*exp(-1/2*(G_tmax[[x]]-mu)^2/sigma^2), 
               data = G_tmax, 
               start = c(mu = 27, sigma = 5, k = 10))
  
  # Extract coefficients
  coefficients <- coef(model)
  
  # Plot 
  ggplot(data = G_tmax, aes(x = .data[[x]], y = .data[[y]])) +
    geom_point() +
    geom_smooth(method = 'nls',
                color = 'blue',
                formula = y ~ k * exp(-1/2 * (x - mu)^2 / sigma^2),
                method.args = list(start = c(mu = 27, sigma = 5, k = 10)),
                se = FALSE) +
    theme_bw() +
    theme(panel.border = element_blank()) +
    annotate("text", 
             x = Inf, y = Inf, vjust = 2, hjust = 1,
             #x = 0.9, y = 0.9, hjust = 1, vjust = 1, halign = 0.5, valign = 0.5,
             label = paste("Amplitude =", round(coefficients[3], 2),
                           ", Standard deviation =", round(coefficients[2], 2),
                           ", Mean =", round(coefficients[1], 2)))
}

# Output plots 
explore.plots = map(response, ~scatter_fun(.x, quant))
explore.plots
## [[1]]

## 
## [[2]]

## 
## [[3]]

G_tmin <- DF %>%
  filter(Loss > 0) %>%
  select(Loss, contains('tmin'))

response = names(G_tmin[c(2:4)]) 
quant <- 'Loss' 

scatter_fun = function(x, y) {
  
  # Fit the nonlinear model using nls
  model <- nls(Loss ~ k*exp(-1/2*(G_tmin[[x]]-mu)^2/sigma^2), 
               data = G_tmin, 
               start = c(mu = 16, sigma = 5, k = 10))
  
  # Extract coefficients
  coefficients <- coef(model)
  
  # Plot 
  ggplot(data = G_tmin, aes(x = .data[[x]], y = .data[[y]])) +
    geom_point() +
    geom_smooth(method = 'nls',
                color = 'blue',
                formula = y ~ k * exp(-1/2 * (x - mu)^2 / sigma^2),
                method.args = list(start = c(mu = 16, sigma = 5, k = 10)),
                se = FALSE) +
    theme_bw() +
    theme(panel.border = element_blank()) +
    annotate("text", 
             x = Inf, y = Inf, vjust = 2, hjust = 1,
             #x = 0.9, y = 0.9, hjust = 1, vjust = 1, halign = 0.5, valign = 0.5,
             label = paste("Amplitude =", round(coefficients[3], 2),
                           ", Standard deviation =", round(coefficients[2], 2),
                           ", Mean =", round(coefficients[1], 2)))
}

# Output plots 
explore.plots = map(response, ~scatter_fun(.x, quant))
explore.plots
G_tmean <- DF %>%
  filter(Loss > 0) %>%
  select(Loss, contains('tmean'))

response = names(test[c(2:4)]) 
quant <- 'Loss' 

scatter_fun = function(x, y) {
  
  # Fit the nonlinear model using nls
  model <- nls(Loss ~ k*exp(-1/2*(G_tmean[[x]]-mu)^2/sigma^2), 
               data = G_tmean, 
               start = c(mu = 18, sigma = 5, k = 10))
  
  # Extract coefficients
  coefficients <- coef(model)
  
  # Plot 
  ggplot(data = test, aes(x = .data[[x]], y = .data[[y]])) +
    geom_point() +
    geom_smooth(method = 'nls',
                color = 'blue',
                formula = y ~ k * exp(-1/2 * (x - mu)^2 / sigma^2),
                method.args = list(start = c(mu = 16, sigma = 5, k = 10)),
                se = FALSE) +
    theme_bw() +
    theme(panel.border = element_blank()) +
    annotate("text", 
             x = Inf, y = Inf, vjust = 2, hjust = 1,
             label = paste("Amplitude =", round(coefficients[3], 2),
                           ", Standard deviation =", round(coefficients[2], 2),
                           ", Mean =", round(coefficients[1], 2)))
}

# Output plots 
explore.plots = map(response, ~scatter_fun(.x, quant))
explore.plots
CUSUM_ppt <- DF %>% 
  filter(Loss > 0) %>%
  rowwise() %>%
  mutate(ppt_CUSUM = sum(across(c(ppt_July, ppt_Aug))))

ggplot(data = CUSUM_ppt, aes(x=ppt_CUSUM, y=Loss)) +
  geom_point() +
  stat_density(aes(x=ppt_CUSUM, y=2500*(..density..)), position="identity", geom="line", color = 'red', linewidth = 1.5)

rm(DF_2020, DF_2021, explore.plots, results_df, G_ppt, G_tmax, G_tmean, G_tmin, CUSUM_ppt) 
## Warning in rm(DF_2020, DF_2021, explore.plots, results_df, G_ppt, G_tmax, :
## object 'results_df' not found
## Warning in rm(DF_2020, DF_2021, explore.plots, results_df, G_ppt, G_tmax, :
## object 'G_tmean' not found
## Warning in rm(DF_2020, DF_2021, explore.plots, results_df, G_ppt, G_tmax, :
## object 'G_tmin' not found

SUMMARIZING CATEGORICAL RESPONSES

Historical fungicide use

HF <- S %>% select(contains('Hist')) %>% select(!ends_with('Reported')) %>% select(!contains('Other'))

# Removing '_Reported' columns - all data has been cleaned. 
# Also removing observations with entries in "other" actually belonged in existing categories, or were nonsense, so removing

column_sums <- colSums(!is.na(HF))
print(column_sums)
##    HistFungiStrat_V10 HistFungiStrat_V10_14   HistFungiStrat_VTR1 
##                   123                    47                   412 
##     HistFungiStrat_R2 HistFungiStrat_PostR2   HistFungiStrat_None 
##                   179                    44                   163
# change all Yes to 1

CO <- as.data.frame(ifelse(!is.na(HF) & HF == "Yes", 1, 0))

# Remove rows with no coinfections by counting across data columns and filtering
CO <- CO %>%
  rowwise() %>%
  mutate(coinfection_check = sum(c_across(c("HistFungiStrat_V10",
                                            "HistFungiStrat_V10_14",
                                            "HistFungiStrat_VTR1",
                                            "HistFungiStrat_R2",
                                            "HistFungiStrat_PostR2",
                                            "HistFungiStrat_None" ))))

CO_matrix <- CO %>% 
  count(HistFungiStrat_V10,
        HistFungiStrat_V10_14,
        HistFungiStrat_VTR1, 
        HistFungiStrat_R2, 
        HistFungiStrat_PostR2, 
        HistFungiStrat_None)

# DO NOT OVERWRITE
#write.xlsx(CO_matrix, '/Users/jilliancheck/Library/CloudStorage/OneDrive-MichiganStateUniversity/Economics survey/Response data/CO_matrixMultiCounty.xlsx', overwrite=FALSE)

rm(CO, CO_matrix, HF)
# Load in fixed matrix

coinfection_data <- read_excel('/Users/jilliancheck/Library/CloudStorage/OneDrive-MichiganStateUniversity/Economics survey/Response Data/CO_matrix FIXED.xlsx')
## New names:
## • `` -> `...1`
colnames(coinfection_data)[1] <- 'X1'

# Melt data so that each combination is represented by its own Row
library(reshape2)
## 
## Attaching package: 'reshape2'
## 
## The following object is masked from 'package:tidyr':
## 
##     smiths
ReshapedData <- melt(coinfection_data, value.name = 'Responses')
## Using X1 as id variables
# Reorder
mylevels <- c('V10', 'V10-V14', 'VT/R1', 'R2', 'PostR2', 'None')

# reorder factors
ReshapedData$X1 <- factor(ReshapedData$X1,levels=mylevels)
ReshapedData$variable <- factor(ReshapedData$variable, levels=mylevels)
 
# Heatmap 
ggplot(ReshapedData, aes(X1, variable, fill= Responses)) + 
  geom_tile(color = 'white')

rm (ReshapedData, coinfection_data)

Increase in fungicide use

S_fungi_20 <- S %>%
  select(Role_Farmer, Location, contains('Change_2020')) %>%
  select(!contains('Rep')) %>%
  na.omit()

S_fungi_20_n <- nrow(S_fungi_20)

S_fungi_20 <- S_fungi_20 %>%
  group_by(FungiChange_2020) %>%
  summarise(Responses_2020 = n()) %>%
  mutate(perc = Responses_2020/S_fungi_20_n) %>%
  ungroup()

S_fungi_21 <- S %>%
  select(Role_Farmer, Location, contains('Change_2021')) %>%
  select(!contains('Rep')) %>%
  na.omit()

S_fungi_21_n <- nrow(S_fungi_21)

S_fungi_21 <- S_fungi_21 %>%
  group_by(FungiChange_2021) %>%
  summarise(Responses_2021 = n()) %>%
  mutate(perc = Responses_2021/S_fungi_21_n) %>%
  ungroup()

S_fungi_21_n <- nrow(S_fungi_21)

S_fungi <- cbind(S_fungi_20, S_fungi_21)

Scouting ability

S_scout <- S %>%
  select(Location, FindTSLiklihood) %>%
  na.omit()

S_scout_n <- nrow(S_scout)

S_scout <- S_scout %>%
  group_by(FindTSLiklihood) %>%
  summarise(Responses = n()) %>%
  mutate(Percent = round(Responses/S_scout_n, 2)*100) %>%
  ungroup()
S_scout <- S %>%
  select(starts_with('Role'), FindTSLiklihood) %>%
  pivot_longer(cols = starts_with('Role'), names_to = 'Role', values_to = 'Role_logic') %>%
  na.omit()

S_scout <- S_scout %>%
  select(!Role_logic) %>%
  group_by(Role) %>%
  mutate(Total = n()) %>%
  ungroup() %>%
  group_by(Role, FindTSLiklihood) %>%
  mutate(Responses = n()) %>%
  ungroup() %>%
  mutate(Percent = round(Responses/Total, 2)*100) %>%
  unique()
rm(S_scout, S_scout_n, S_fungi, S_fungi_20, S_fungi_21, S_fungi_20_n, S_fungi_21_n)

USDA NASS DATA

USDA_21 <- read.csv('/Users/jilliancheck/Library/CloudStorage/OneDrive-MichiganStateUniversity/Economics survey/USDA NASS/2021.csv')
USDA_18 <- read.csv('/Users/jilliancheck/Library/CloudStorage/OneDrive-MichiganStateUniversity/Economics survey/USDA NASS/2018.csv')
USDA_16 <- read.csv('/Users/jilliancheck/Library/CloudStorage/OneDrive-MichiganStateUniversity/Economics survey/USDA NASS/2016.csv')
USDA_14 <- read.csv('/Users/jilliancheck/Library/CloudStorage/OneDrive-MichiganStateUniversity/Economics survey/USDA NASS/2014.csv')
#USDA_10 <- read.csv('/Users/jilliancheck/Library/CloudStorage/OneDrive-MichiganStateUniversity/Economics survey/USDA NASS/2010.csv')

# not including 2010 data because data collection is only for multistate!

USDA <- rbind (USDA_21, USDA_18, USDA_16, USDA_14)
rm(USDA_21, USDA_18, USDA_16, USDA_14)
USDA <- USDA %>%
  relocate(Year, .before = 'Domain') %>% # Move 'Year' column to before 'Domain'
  subset(Geo.Level != 'REGION : MULTI-STATE') %>% # 'Remove any rows containing 'REGION: MULTI-STATE'
  relocate(State, .after = 'Year') %>% # Relocate 'State' to after 'Year'
  select(15:28) %>% # Only retain columns 15-28
  mutate(Name = str_extract(Domain.Category, "(?<=\\().*?(?=\\s*=)")) %>% # Extract the chemical names from the 'Domain.Category' and create a new column named 'Name'
  relocate(Name, .after = Domain.Category) %>% # Relocate Name columns after 'Domain.Category'
  subset(str_detect(Domain.Category, pattern = 'FUNGI')) %>% # Only retain rows where Domain.Category contains 'FUNGI'
  select(!c('Domain', 'Domain.Category')) %>% # Keep all columns besides Domain and Domain.Category
  subset(!is.na(Name)) %>% # Remove rows where Name is NA
  select(!contains('CV')) # Remove columns with CV in name

# Change column names
colnames(USDA) <- c('Year', 'State', 'Name', 
                     'Total_Fungicide_Apps_lb', 
                     'Average_Fungicide_Apps_lb.a', 
                     'Average_Fungicide_Apps_lb.a.y',
                     'Average_Number_Fungicide_Apps', 
                     'Average_Prop_Area_Treated_.')

# Remove weird columns
USDA <- USDA %>%
  subset(!Total_Fungicide_Apps_lb %in% c(' (NA)', ' (D)', ' (Z)' , '')) %>%
  subset(!Average_Fungicide_Apps_lb.a %in% c(' (D)',' (NA)', ' (Z)', '')) %>%
  subset(!Average_Fungicide_Apps_lb.a.y %in% c(' (D)',' (NA)', ' (Z)', '')) %>%
  subset(!Average_Number_Fungicide_Apps %in% c(' (D)',' (NA)', ' (Z)', '')) %>%
  subset(!Average_Prop_Area_Treated_. %in% c(' (D)',' (NA)', ' (Z)', ''))

USDA <- USDA %>%
  mutate(FRAC = ifelse(Name %in% c('AZOXYSTROBIN', 'PYRACLOSTROBIN', 'TRIFLOXYSTROBIN'), '11',
                       ifelse(Name == 'BENZOVINDIFLUPYR', '7', '3'))) %>%
  relocate(FRAC, .after = Name)

USDA <- USDA
USDA$Total_Fungicide_Apps_lb <- as.numeric(gsub(",", "", USDA$Total_Fungicide_Apps_lb))
USDA[5:9] <- sapply(USDA[5:9], as.numeric)
response = names(USDA[c(5:9)])
response = set_names(response)

State x FRAC

P <- USDA %>%
  #subset(State %in% c('ILLINOIS', 'INDIANA', 'IOWA')) %>%
  group_by(Year, FRAC, State) %>%
  summarise(Total_Fungicide_Apps_lb = sum(Total_Fungicide_Apps_lb),
            Average_Fungicide_Apps_lb.a = mean(Average_Fungicide_Apps_lb.a),
            Average_Fungicide_Apps_lb.a.y = mean(Average_Fungicide_Apps_lb.a.y),
            Average_Number_Fungicide_Apps = mean(Average_Number_Fungicide_Apps),
            Average_Prop_Area_Treated_. = sum(Average_Prop_Area_Treated_.))
## `summarise()` has grouped output by 'Year', 'FRAC'. You can override using the
## `.groups` argument.
scatter_fun = function(x, y) {
  ggplot(P, aes(x = .data[[x]], y = .data[[y]])) +
    geom_col(aes(fill = FRAC), color = 'black') + 
    scale_x_continuous(breaks = seq(1991, 2023, 2)) + 
    theme_bw() +
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
    facet_grid(~State)}

explore.plots = map(response, ~scatter_fun('Year', .x))
explore.plots
## $Total_Fungicide_Apps_lb

## 
## $Average_Fungicide_Apps_lb.a

## 
## $Average_Fungicide_Apps_lb.a.y

## 
## $Average_Number_Fungicide_Apps

## 
## $Average_Prop_Area_Treated_.

State X Fungicide

P1 <- USDA %>%
  subset(State %in% c('ILLINOIS', 'INDIANA', 'IOWA')) %>%
  #subset(State %in% c('ILLINOIS', 'INDIANA', 'IOWA', 'MICHIGAN', 'OHIO')) %>%
  group_by(Year, Name, State) %>%
  summarise(Total_Fungicide_Apps_lb = sum(Total_Fungicide_Apps_lb),
            Average_Fungicide_Apps_lb.a = mean(Average_Fungicide_Apps_lb.a),
            Average_Fungicide_Apps_lb.a.y = mean(Average_Fungicide_Apps_lb.a.y),
            Average_Number_Fungicide_Apps = mean(Average_Number_Fungicide_Apps),
            Average_Prop_Area_Treated_. = sum(Average_Prop_Area_Treated_.))
## `summarise()` has grouped output by 'Year', 'Name'. You can override using the
## `.groups` argument.
scatter_fun = function(x, y) {
  ggplot(P1, aes(x = .data[[x]], y = .data[[y]])) +
    geom_col(aes(fill = Name), color = 'black') + 
    scale_x_continuous(breaks = seq(1991, 2023, 2)) + 
    theme_bw() +
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
    facet_grid(~State)}

explore.plots = map(response, ~scatter_fun('Year', .x))
explore.plots
## $Total_Fungicide_Apps_lb

## 
## $Average_Fungicide_Apps_lb.a

## 
## $Average_Fungicide_Apps_lb.a.y

## 
## $Average_Number_Fungicide_Apps

## 
## $Average_Prop_Area_Treated_.